library(sasld)


# 12.1 ---------------------------------------------------------------
# Esempio 3
data(hsfa)

fit <- lm(RM1_BENCH ~ BRTF_60, data=hsfa)
summary(fit)

fit$coef[1] + fit$coef[2]*mean(hsfa$BRTF_60)
mean(hsfa$RM1_BENCH)
# -------------------------------------------------------------------




# 12.2 ---------------------------------------------------------------
# Esempio 6
cor(hsfa$RM1_BENCH,hsfa$BRTF_60)

# standardizzo
yz <- (hsfa$RM1_BENCH - mean(hsfa$RM1_BENCH))/sd(hsfa$RM1_BENCH)
xz <- (hsfa$BRTF_60 - mean(hsfa$BRTF_60))/sd(hsfa$BRTF_60)
cor(yz,xz)

fit.z <- lm(yz ~ xz)
fit.z$coefficients

# la correlazione non dipende dall'unità di misura
kg <- hsfa$RM1_BENCH/2.2
cor(kg,hsfa$BRTF_60)

# ma la regressione sì
lm(kg ~ hsfa$BRTF_60)

fit$coef/2.2


# Esempio 9
fit <- lm(RM1_BENCH ~ BRTF_60, data=hsfa)

devs <- sum((fit$fitted - mean(fit$fitted))^2)
devr <- sum(fit$res^2)
devt <- sum((hsfa$RM1_BENCH - mean(hsfa$RM1_BENCH))^2)
c(devs,devr,devt)

devs/devt
cor(hsfa$RM1_BENCH,hsfa$BRTF_60)^2

# -------------------------------------------------------------------




# 12.3 ---------------------------------------------------------------
# Esempio 11
fit <- lm(RM1_BENCH ~ BRTF_60, data=hsfa)
summary(fit)

cor.test(hsfa$BRTF_60,hsfa$RM1_BENCH)

# Esempio 12
confint(fit)
# -------------------------------------------------------------------




# 12.4 ---------------------------------------------------------------
# Esempio 13
data("georgia")

fit <- lm(CGPA ~ HSGPA, data=georgia)
fit$coef

rs <- rstandard(fit)
nok <- which(abs(rs) > 2)
rs[nok]

# Tabella 12.6
tmp <- cbind(georgia$HSGPA[nok],georgia$CGPA[nok],
             fit$fitted[nok],fit$res[nok],rs[nok])
colnames(tmp) <- c("HSGPA","CGPA","Previsto","Residuo","Res. stand.")
tmp

hist(rs)
qqn(rs)

sqrt(sum(fit$res^2)/fit$df.residual)

sfit <- summary(fit)
sfit$sigma


# IC per i valori previsti
# Esempio 16
# Tabella 12.7
data(hsfa)

fit <- lm(RM1_BENCH ~ BRTF_60, data=hsfa)
a <- predict(fit,interval="confidence")
b <- predict(fit,interval="prediction")
d <- predict(fit,se.fit=TRUE); d <- d$se.fit
tmp <- cbind(fit$fitted,d,a[,2:3],b[,2:3])
colnames(tmp) <- c("Fit","SE fit","lwr IC","upr IC","lwr IP","upr IP")
tmp[51,]


# la tavola dell'ANOVA
fit <- lm(RM1_BENCH ~ BRTF_60, data=hsfa)
anova(fit)

summary(fit)

sfit <- summary(fit)
c(sfit$fstatistic[1],sfit$coef[2,3]^2)
# -------------------------------------------------------------------




# 12.5 ---------------------------------------------------------------
data(facebook)

plot(fb$giorni,log10(fb$utenti))
cor(fb$giorni,log10(fb$utenti))

fit <- lm(log10(utenti) ~ giorni, data=fb)
fit

10^fit$coef

yh <- 1.9559 * 1.00275^fb$giorni

cbind(fb$giorni,fb$utenti,10^fit$fitted,yh)
# -------------------------------------------------------------------




# 12.6 ---------------------------------------------------------------
b0 <- 505
b1 <- 35
x <- c(0,1,2)
yh <- b0 + b1*x

round(c(505+rnorm(1,0,100),540+rnorm(1,0,100),575+rnorm(1,0,100)),0)

nrep <- 100000
set.seed(123456)
y2 <- 505 + rnorm(nrep,0,100)
y3 <- 540 + rnorm(nrep,0,100)
y4 <- 575 + rnorm(nrep,0,100)

y <- c(y2,y3,y4)
anni <- c(rep(2,nrep),rep(3,nrep),rep(4,nrep))

tapply(y,anni,mean)
tapply(y,anni,sd)
boxplot(y ~ anni,range=0)

x <- anni - 2
fit <- lm(y ~ x)
fit


# si può fare di più?
round(c(y2[1],y3[1],y4[1]),0)


Y <- cbind(y2,y3,y4)

job <- function(y) {
    x <- c(0,1,2)
    fit <- lm(y ~ x)
    fit$coef
}

B <- apply(Y,1,job) # qui impiega un po' di tempo

head(t(B))

apply(B,1,mean)

apply(B,1,sd)

# la devianza di x è 2; la varianza è 100^2
# quindi l'errore standard (vero) è
100/sqrt(2)

qqn(B[2,]) # distribuzione delle pendenze campionarie
qqn(B[1,]) # distribuzione delle intercette campionarie
# -------------------------------------------------------------------